(* Adobe's PageMill 1.0 is a nearly-great application for quickly and efficiently creating World Wide Web pages which look good in common browsers such as Netscape. It is only *nearly*-great, however, since it makes its own set of assumptions and some down-right mistakes in HTML code. The following script will open PageMill documents using BBEdit 3.5 (the commercial version only -- I don't think the Lite version supports AppleScript). Some of the Find/Replace patterns change true errors in the PageMill code (significantly, Pattern (10) changes the notorious

code to the more standard

). However, many of the Find/Replace patterns are cosmetic only, and do not affect the final HTML code. Important!!!! Note that if you, for any reason want to keep runs of multiple spaces (say to preserve whitespace using the

 
tag), you must comment out the fourth pattern! The following script is "e-mail-ware." If you like it, send me a friendly message. if you have suggestions for improvements, I want to hear from you even more! Clint MacDonald Cell Biology & Biochemistry Texas Tech University Health Sciences Center 3601 4th Street Lubbock, TX 79430 March 27, 1996 *) -- ---------- property HTML_File : false property HTML_File_End : false global docList -- ---------- on run tell application "Finder" activate set docList to selection end tell tell application "BBEdit 3.5" activate repeat with i from 1 to (count items in docList) open (item i of docList as alias) checkHTML() of me if HTML_File and HTML_File_End then cleanUpPageMill(docList) of me else display dialog "The file " & (item i of docList) &  " does not seem to be an HTML file." buttons {"OK"} default button 1 tell me to quit end if end repeat display dialog  "Would you like to save the changes to all files?" buttons {"Yes", "Later"} default button 2 if button returned of result is "Yes" then repeat with i from 1 to (count of windows) go to line 0 tell window i to save end repeat else go to line 0 end if end tell -- BBEdit end run -- ---------- on open (docList) tell application "BBEdit 3.5" activate repeat with i from 1 to (count items in docList) open (item i of docList as alias) checkHTML() of me if HTML_File and HTML_File_End then cleanUpPageMill(docList) of me else display dialog "The file " & (item i of docList) &  " does not seem to be an HTML file." buttons {"OK"} default button 1 tell me to quit end if end repeat display dialog  "Would you like to save the changes to all files?" buttons {"Yes", "Later"} default button 2 if button returned of result is "Yes" then repeat with i from 1 to (count of windows) go to line 0 tell window i to save end repeat else go to line 0 end if end tell -- BBEdit end open -- ---------- on cleanUpPageMill(docList) -- here's the meat of the script! -- to omit any individual pattern, comment out its subroutine here eraseWhiteSpace() of me HRline() of me elimDoubleSpace() of me -- comment out if using
 tags
	spaceBR() of me
	endHeader() of me
	orphaned_P() of me
	addressBR() of me
	double_BR() of me
	eraseNATSIZEFLAG() of me
	isolateHeadBody() of me
	tooManyReturns() of me
	
end cleanUpPageMill

-- ----------

on checkHTML()
	tell application "BBEdit 3.5"
		
		go to line 0
		find ""
		set HTML_File to the result
		find "" without grep
		set HTML_File_End to the result
		go to line 0
		
	end tell --BBEdit
end checkHTML

-- ----------

on eraseWhiteSpace()
	tell application "BBEdit 3.5"
		-- Pattern 1
		-- this grep pattern erases spaces at the end of a line
		-- which PageMill seems to like to include
		replace Every Occurrence searching for "  *$" using "" with grep
	end tell --BBEdit
end eraseWhiteSpace

-- ----------

on HRline()
	tell application "BBEdit 3.5"
		-- Pattern 2
		-- this Find/Replace (with the one following it) places
		-- the 
tag on its own line replace Every Occurrence searching for ">
<" using ">
<" -- ---------- replace Every Occurrence searching for "
" using "
" end tell --BBEdit end HRline -- ---------- on elimDoubleSpace() tell application "BBEdit 3.5" -- Pattern 3 -- IMPORTANT! -- COMMENT THIS PATTERN OUT if you are using the
 
-- tags, or if for any other reason you want to keep multiple spaces -- this set of Find/Replaces eliminates two or more spaces -- in a row; note the grep pattern in the middle "space-space-space-*" -- which will find two or more spaces in a row. replace Every Occurrence searching for " " using "%%%%%<TITLE>" replace Every Occurrence searching for " *" using " " with grep replace Every Occurrence searching for "%%%%%<TITLE>" using " <TITLE>" end tell --BBEdit end elimDoubleSpace -- ---------- on spaceBR() tell application "BBEdit 3.5" -- Pattern 4 -- cleans up unnecessary spaces after periods replace Every Occurrence searching for " *<BR> " using "<BR> " with grep -- ---------- replace Every Occurrence searching for " *<P> " using "<P> " with grep end tell --BBEdit end spaceBR -- ---------- on endHeader() tell application "BBEdit 3.5" -- Pattern 5 -- PageMill likes to move the ending header tag to a new line -- by itself; this grep pattern removes the return character -- preceeding any ending tag replace Every Occurrence searching for "> </H([1-6])>" using "></H\\1>" with grep end tell --BBEdit end endHeader -- ---------- on orphaned_P() tell application "BBEdit 3.5" -- Pattern 6 -- adds a blank line before orphaned <P> tags replace Every Occurrence searching for "> <P>" using "> <P>" end tell --BBEdit end orphaned_P -- ---------- on addressBR() tell application "BBEdit 3.5" -- Pattern 7 -- I couldn't make PageMaker place more than one <BR> tag before -- an <ADDRESS> tag, so I replace it with a <P> replace Every Occurrence searching for "<BR> <ADDRESS>" using "<P> <ADDRESS>" end tell --BBEdit end addressBR -- ---------- on double_BR() tell application "BBEdit 3.5" -- Pattern 8 -- this Find/Replace eliminates the dreaded "double-<BR>" tags -- which PageMill always inserts instead of the more correct <P> replace Every Occurrence searching for "<BR> <BR>" using "<P> " end tell --BBEdit end double_BR -- ---------- on eraseNATSIZEFLAG() tell application "BBEdit 3.5" -- Pattern 9 -- eliminates the NATURALSIZEFLAG attribute from <IMG> tags replace Every Occurrence searching for " NATURALSIZEFLAG= \"[0-3]\"" using "" with grep replace Every Occurrence searching for " NATURALSIZEFLAG=\"[0-3]\"" using "" with grep replace Every Occurrence searching for " NATURALSIZEFLAG =\"[0-3]\"" using "" with grep replace Every Occurrence searching for " NATURALSIZEFLAG=\"[0-3]\" " using "" with grep replace Every Occurrence searching for " NATURALSIZEFLAG=\"[0-3]\"" using "" with grep end tell --BBEdit end eraseNATSIZEFLAG -- ---------- on isolateHeadBody() tell application "BBEdit 3.5" -- Pattern 10 -- adds a line between the Header and Body sections -- adds a line between the last line of the text and the end -- of the Body replace Every Occurrence searching for "</HEAD> <BODY(.*)>" using "</HEAD> <BODY\\1> " with grep -- ---------- replace Every Occurrence searching for "</BODY> </HTML>" using " </BODY> </HTML>" end tell --BBEdit end isolateHeadBody -- ---------- on tooManyReturns() tell application "BBEdit 3.5" -- Pattern 11 -- eliminates extra returns that might have crept in replace Every Occurrence searching for " *" using " " with grep end tell -- BBEdit end tooManyReturns